# REPLICATION CODE FOR REILLY S. STEEL. 2024. "PARTISAN BIAS IN SECURITIES ENFORCEMENT."
# JOURNAL OF LAW, ECONOMICS, & ORGANIZATION
# INVESTIGATION ANALYSIS

#### SETUP ####

rm(list = ls(all.names = TRUE)) # clear environment

# load packages

library(tidyverse)
library(lubridate)
library(stringr)
library(fedmatch)
library(tidystringdist)
library(pdftools)
library(readxl)
library(fixest)
library(datawizard)
library(texreg)
library(tikzDevice)
library(kableExtra)
library(cobalt)

setwd("~/Desktop/Projects/SEC Enforcement/Replication") # set working directory

# load data

load("Data/data_for_investigation_analysis.RData")

#### DESCRIPTIVE STATS ####

## balance table

balance_table_data <- firms_inv %>%
  filter(!is.na(firm_party), !is.na(firm_alignment), !is.na(log_tot_lagged), !is.na(sbc_rep), !is.na(hfsc_rep), !is.na(senApp_rep),
         !is.na(houseApp_rep), !is.na(log_lobbying_lagged), !is.na(log_lobbying_lagged_sec_rel), !is.na(log_tot_comm_contribs_lagged)) %>%
  mutate(finance_industry = if_else(FF_12 == 11, 1, 0))

balance_table <- bal.tab(firm_alignment ~ at + capx + ceq + ch + dt + emp + ni + oiadp + revt + roa + leverage +
                           op_margin + cd_med_hh_inc + cd_unemp_rate + cd_finance_workers + tot_lagged + sbc_rep + 
                           senApp_rep + hfsc_rep + houseApp_rep + recent_ca + recent_restatement + finance_industry,
                         data = balance_table_data)

balance_table$Balance$Variable <- rownames(balance_table$Balance)

rownames(balance_table$Balance) <- NULL

## targeted v. non-targeted firms by party control

dem_targets <- firms_inv %>%
  filter(sec_party == 0 & investig_dummy == 1) %>%
  summarize(mean_assets = mean(at, na.rm = TRUE),
            mean_leverage = mean(leverage, na.rm = TRUE),
            mean_emp = mean(emp, na.rm = TRUE),
            mean_rev = mean(revt, na.rm = TRUE),
            mean_ni = mean(ni, na.rm = TRUE),
            mean_roa = mean(roa, na.rm = TRUE),
            mean_op_margin = mean(op_margin, na.rm = TRUE),
            mean_sbc_rep = mean(sbc_rep, na.rm = TRUE),
            mean_hfsc_rep = mean(hfsc_rep, na.rm = TRUE),
            mean_sen_app_rep = mean(senApp_rep, na.rm = TRUE),
            mean_house_app_rep = mean(houseApp_rep, na.rm = TRUE),
            mean_firm_party = mean(firm_party, na.rm = TRUE),
            mean_tot_lagged = mean(tot_lagged, na.rm = TRUE)
  ) %>%
  t()

repub_targets <- firms_inv %>%
  filter(sec_party == 1 & investig_dummy == 1) %>%
  summarize(mean_assets = mean(at, na.rm = TRUE),
            mean_leverage = mean(leverage, na.rm = TRUE),
            mean_emp = mean(emp, na.rm = TRUE),
            mean_rev = mean(revt, na.rm = TRUE),
            mean_ni = mean(ni, na.rm = TRUE),
            mean_roa = mean(roa, na.rm = TRUE),
            mean_op_margin = mean(op_margin, na.rm = TRUE),
            mean_sbc_rep = mean(sbc_rep, na.rm = TRUE),
            mean_hfsc_rep = mean(hfsc_rep, na.rm = TRUE),
            mean_sen_app_rep = mean(senApp_rep, na.rm = TRUE),
            mean_house_app_rep = mean(houseApp_rep, na.rm = TRUE),
            mean_firm_party = mean(firm_party, na.rm = TRUE),
            mean_tot_lagged = mean(tot_lagged, na.rm = TRUE)
  ) %>%
  t()

dem_nontargets <- firms_inv %>%
  filter(sec_party == 0 & investig_dummy == 0) %>%
  summarize(mean_assets = mean(at, na.rm = TRUE),
            mean_leverage = mean(leverage, na.rm = TRUE),
            mean_emp = mean(emp, na.rm = TRUE),
            mean_rev = mean(revt, na.rm = TRUE),
            mean_ni = mean(ni, na.rm = TRUE),
            mean_roa = mean(roa, na.rm = TRUE),
            mean_op_margin = mean(op_margin, na.rm = TRUE),
            mean_sbc_rep = mean(sbc_rep, na.rm = TRUE),
            mean_hfsc_rep = mean(hfsc_rep, na.rm = TRUE),
            mean_sen_app_rep = mean(senApp_rep, na.rm = TRUE),
            mean_house_app_rep = mean(houseApp_rep, na.rm = TRUE),
            mean_firm_party = mean(firm_party, na.rm = TRUE),
            mean_tot_lagged = mean(tot_lagged, na.rm = TRUE)
  ) %>%
  t()

repub_nontargets <- firms_inv %>%
  filter(sec_party == 1 & investig_dummy == 0) %>%
  summarize(mean_assets = mean(at, na.rm = TRUE),
            mean_leverage = mean(leverage, na.rm = TRUE),
            mean_emp = mean(emp, na.rm = TRUE),
            mean_rev = mean(revt, na.rm = TRUE),
            mean_ni = mean(ni, na.rm = TRUE),
            mean_roa = mean(roa, na.rm = TRUE),
            mean_op_margin = mean(op_margin, na.rm = TRUE),
            mean_sbc_rep = mean(sbc_rep, na.rm = TRUE),
            mean_hfsc_rep = mean(hfsc_rep, na.rm = TRUE),
            mean_sen_app_rep = mean(senApp_rep, na.rm = TRUE),
            mean_house_app_rep = mean(houseApp_rep, na.rm = TRUE),
            mean_firm_party = mean(firm_party, na.rm = TRUE),
            mean_tot_lagged = mean(tot_lagged, na.rm = TRUE)
  ) %>%
  t()

firm_stats <- tibble(variable = rownames(dem_targets),
                     dem_targets = as.numeric(dem_targets[,1]),
                     dem_nontargets = as.numeric(dem_nontargets[,1]),
                     repub_targets = as.numeric(repub_targets[,1]),
                     repub_nontargets = as.numeric(repub_nontargets[,1]))

firm_stats_table <- firm_stats %>%
  mutate(variable = c("Assets",
                      "Leverage",
                      "Employment",
                      "Revenue",
                      "Net Income",
                      "Return on Assets",
                      "Operating Margin",
                      "SBC",
                      "HFSC",
                      "S. Approp.",
                      "H. Approp.",
                      "Firm Party",
                      "Total Contribs.")) %>%
  filter(variable != "Aggregate Number of Individuals Charged") %>%
  mutate(order = c(7, 8, 9, 10, 11, 12, 13, 3, 4, 5, 6, 1, 2)) %>%
  arrange(order) %>%
  select(-order) %>%
  rename(" " = variable,
         "Targets (D)" = dem_targets,
         "Nontargets (D)" = dem_nontargets,
         "Targets (R)" = repub_targets,
         "Nontargets (R)" = repub_nontargets) %>%
  kbl(format = "latex", digits = 2) %>%
  kable_classic_2(full_width = F) %>%
  add_header_above(c(" " = 1, "Democratic SEC" = 2, "Republican SEC" = 2))

## plot timing

firm_party_stats <- firms_inv %>%
  group_by(gvkey) %>%
  summarize(firm_party_mean = mean(firm_party, na.rm = TRUE),
            firm_party_sd = sd(firm_party, na.rm = TRUE))

firm_party_by_year <- firms_inv %>%
  group_by(year) %>%
  summarize(firm_party_mean = mean(firm_party, na.rm = TRUE),
            firm_party_sd = sd(firm_party, na.rm = TRUE))

sd(firms_inv$firm_party) # sd of firm party score is 0.31
mean(firm_party_stats$firm_party_sd, na.rm = TRUE) # mean within-firm firm party sd = 0.07
mean(firm_party_by_year$firm_party_sd, na.rm = TRUE) # mean within-year firm party sd = 0.31
mean(firms_inv$firm_party, na.rm = TRUE) # mean firm party score is 0.6

rep_firm_targets <- invs %>%
  left_join(firm_party_by_year) %>%
  mutate(rep_firm_dummy = if_else(firm_party >= firm_party_mean, 1, 0),
         open_date = as.Date(open_date)) %>%
  filter(rep_firm_dummy == 1)

dem_firm_targets <- invs %>%
  left_join(firm_party_by_year) %>%
  mutate(rep_firm_dummy = if_else(firm_party >= firm_party_mean, 1, 0),
         open_date = as.Date(open_date)) %>%
  filter(rep_firm_dummy == 0)

inv_date_density <- ggplot() +
  geom_rect(aes(xmin = ymd("2002-01-20"), xmax = ymd("2009-01-20"), ymin = 0, ymax = 0.0003), fill = "red", alpha = 0.2) +
  geom_rect(aes(xmin = ymd("2009-01-20"), xmax = ymd("2015-01-20"), ymin = 0, ymax = 0.0003), fill = "blue", alpha = 0.15) +
  geom_vline(xintercept = c(ymd("2009-01-20")), linetype = "solid", color = "black", alpha = 0.75) +
  geom_density(data = rep_firm_targets, aes(x = open_date), color = "Red", kernel = "r") +
  geom_density(data = dem_firm_targets, aes(x = open_date), color = "Blue", kernel = "r") +
  coord_cartesian(xlim = c(ymd("2002-01-20"), ymd("2015-01-20")), ylim = c(0, 0.0003), expand = FALSE) +
  scale_x_date(breaks = c(seq.Date(ymd("2007-01-20"), ymd("2015-01-20"), "4 years")),
               minor_breaks = seq.Date(ymd("2002-01-20"), ymd("2015-01-20"), "1 year")) +
  theme_bw() +
  labs(x = "Date", y = "Density")

#### ESTIMATE MODELS ####

## logit / lpm

m1 <- feglm(investig_dummy ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
            + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
            | gvkey + year, firms_inv, family = binomial(link = "logit"))

m1.25 <- feglm(investig_dummy ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
               + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
               + recent_ca + log_at + leverage + roa + log_emp + op_margin
               | gvkey + year^finance_industry, firms_inv, family = binomial(link = "logit"))

m1.5 <- feglm(investig_dummy ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
              + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
              + recent_restatement + recent_ca + log_at + leverage + roa + log_emp + op_margin
              | gvkey + year^finance_industry, firms_inv, family = binomial(link = "logit"))

m2 <- feols(investig_dummy ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
            + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged 
            | gvkey + year, firms_inv)

m2.25 <- feols(investig_dummy ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
               + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
               + recent_ca + log_at + leverage + roa + log_emp + op_margin
               | gvkey + year^finance_industry, firms_inv)

m2.5 <- feols(investig_dummy ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
              + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
              + recent_restatement + recent_ca + log_at + leverage + roa + log_emp + op_margin
              | gvkey + year^finance_industry, firms_inv)

m1_nc <- feglm(investig_dummy ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
               + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
               | gvkey + year, firms_inv_nc, family = binomial(link = "logit"))

m1.25_nc <- feglm(investig_dummy ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
                  + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
                  + recent_ca + log_at + leverage + roa + log_emp + op_margin
                  | gvkey + year^finance_industry, firms_inv_nc, family = binomial(link = "logit"))

m1.5_nc <- feglm(investig_dummy ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
                 + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
                 + recent_restatement + recent_ca + log_at + leverage + roa + log_emp + op_margin
                 | gvkey + year^finance_industry, firms_inv_nc, family = binomial(link = "logit"))

## count models

m3 <- fepois(n_investigs ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
             + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
             | gvkey + year, firms_inv)

m3.25 <- fepois(n_investigs ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
                + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
                + recent_ca + log_at + leverage + roa + log_emp + op_margin
                | gvkey + year^finance_industry, firms_inv)

m3.5 <- fepois(n_investigs ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
               + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
               + recent_restatement + recent_ca + log_at + leverage + roa + log_emp + op_margin
               | gvkey + year^finance_industry, firms_inv)

m4 <- feols(log_n_investigs ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
            + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged 
            | gvkey + year, firms_inv)

m4.25 <- feols(log_n_investigs ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
               + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
               + recent_ca + log_at + leverage + roa + log_emp + op_margin
               | gvkey + year^finance_industry, firms_inv)

m4.5 <- feols(log_n_investigs ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
              + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
              + recent_restatement + recent_ca + log_at + leverage + roa + log_emp + op_margin
              | gvkey + year^finance_industry, firms_inv)

m3_nc <- fepois(n_investigs ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
                + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
                | gvkey + year, firms_inv_nc)

m3.25_nc <- fepois(n_investigs ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
                   + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
                   + recent_ca + log_at + leverage + roa + log_emp + op_margin
                   | gvkey + year^finance_industry, firms_inv_nc)

m3.5_nc <- fepois(n_investigs ~ firm_party + firm_alignment + log_tot_lagged + sbc_rep + hfsc_rep 
                  + senApp_rep + houseApp_rep + log_lobbying_lagged + log_lobbying_lagged_sec_rel + log_tot_comm_contribs_lagged
                  + recent_restatement + recent_ca + log_at + leverage + roa + log_emp + op_margin
                  | gvkey + year^finance_industry, firms_inv_nc)

#### TABLES ####

texreg(list(m1, m1.25, m1.5, m3, m3.25, m3.5),
                           file = "Output/inv_main_table.tex",
                           stars = c(0.001, 0.01, 0.05, 0.1),
                           symbol = "+",
                           digits = 3,
                           custom.model.names = c("Inv.",
                                                  "Inv.",
                                                  "Inv.",
                                                  "N. Inv.",
                                                  "N. Inv.",
                                                  "N. Inv."),
                           custom.coef.map = list("firm_party" = "Firm Party",
                                                  "firm_alignment" = "Alignment",
                                                  "recent_ca" = "Recent Class Action",
                                                  "recent_restatement" = "Recent Restatement"),
                           custom.gof.rows = list("Firm FE" = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
                                                  "Year FE" = c("Yes", "", "", "Yes", "", ""),
                                                  "Year--Finance FE" = c("", "Yes", "Yes", "", "Yes", "Yes"),
                                                  "Political Controls" = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
                                                  "Financial Controls" = c("No", "Yes", "Yes", "No", "Yes", "Yes")))

texreg(list(m1, m1.25, m1.5, m3, m3.25, m3.5),
                                file = "Output/inv_main_table_full.tex",
                                stars = c(0.001, 0.01, 0.05, 0.1),
                                symbol = "+",
                                digits = 3,
                                custom.model.names = c("Inv.",
                                                       "Inv.",
                                                       "Inv.",
                                                       "N. Inv.",
                                                       "N. Inv.",
                                                       "N. Inv."),
                                custom.coef.map = list("firm_party" = "Firm Party",
                                                       "firm_alignment" = "Alignment",
                                                       "recent_ca" = "Recent Class Action",
                                                       "recent_restatement" = "Recent Restatement",
                                                       "log_tot_lagged" = "Log(Tot. Contribs.)",
                                                       "log_tot_comm_contribs_lagged" = "Log(Tot. Comm. Contribs.)",
                                                       "log_lobbying_lagged" = "Log(Tot. Lobbying Exp.)",
                                                       "log_lobbying_lagged_sec_rel" = "Log(Tot. SEC-Rel. Lobbying Exp.)",
                                                       "sbc_rep" = "SBC",
                                                       "hfsc_rep" = "HFSC",
                                                       "senApp_rep" = "S. Approp.",
                                                       "houseApp_rep" = "H. Approp.",
                                                       "log_at" = "Log(Assets)",
                                                       "log_emp" = "Log(Employment)",
                                                       "leverage" = "Leverage",
                                                       "roa" = "ROA",
                                                       "op_margin" = "Operating Margin"
                                ),
                                custom.gof.rows = list("Firm FE" = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes"),
                                                       "Year FE" = c("Yes", "", "", "Yes", "", ""),
                                                       "Year--Finance FE" = c("", "Yes", "Yes", "", "Yes", "Yes")))



firm_stats_table %>% save_kable(file = "Output/firm_stats_table_investigs.tex")

balance_table$Balance %>%
  as_tibble() %>%
  relocate(Variable) %>%
  dplyr::select(-Corr.Adj) %>%
  filter(!str_detect(Variable, "<NA>")) %>%
  kbl(format = "latex", digits = 2) %>%
  kable_classic_2(full_width = F) %>%
  save_kable(file = "Output/balance_table_investigs.tex")


#### PLOTS ####

options(tz="CA")

tikz(file = "Output/inv_density.tex", width = 5, height = 4)

inv_date_density <- inv_date_density

print(inv_date_density)

dev.off()